VIBRANT Trial - Primary Analyses

Author

Laura Symul, Laura Vermeren, Susan Holmes

Code
library(tidyverse)
library(magrittr)
library(gt)
library(patchwork)
library(SummarizedExperiment)
library(tidySummarizedExperiment)
library(brms)
library(gtsummary)
library(labelled)
library(scales)
# library(gridExtra)
library(ggpubr)
# library(mia) # BiocManager::install("mia")

theme_set(theme_light())
tmp <- fs::dir_map("R", source)
tmp <- fs::dir_map("../VIBRANT-99-utils/R/", source)
rm(tmp)

This document presents the primary analyses of the VIBRANT trial.

Loading the data

Code
mae_files <- 
  fs::dir_ls(str_c(data_dir(), "04 unblinded MAEs/"), regexp = "mae_full_.*\\.rds$") |> 
  sort(decreasing = TRUE) |>
  magrittr::extract(1)

mae <- readRDS(mae_files)

rm(mae_files)

Analyses

Demographics (Table 1)

Summary table

We create a table1_data data.frame from the participant_crfs_merged table stored in the @metadata slot of the mae.

This table contains almost all variables needed to create table 1 (i.e., site, age, race, ethn, cut_size_meals, eat_less, hungry_did_not_eat, sexual_partners_lifetime, sexual_partners_past_month, contraception_method).

In addition, the mae@colData contains the participants’ arm and study population flags (here we keep the mITT population).

From the variables listed above, we create the variable food_insecurity using the three variables cut_size_meals, eat_less and hungry_did_not_eat: if any of these three variables is “Yes”, then the participant is considered to be affected by food insecurity.

Code
table1_data <- 
  metadata(mae)$participant_crfs_merged |> 
  dplyr::left_join(
    colData(mae) |> as_tibble() |> select(pid, site, arm, ITT, mITT, PP) |> distinct(), 
    by = join_by(pid, site)
    ) |> 
  filter(mITT) |> 
  select(pid, site, arm, age, race, ethn, cut_size_meals, eat_less, hungry_did_not_eat, sexual_partners_lifetime, sexual_partners_past_month, contraception_method) |> 
  mutate(
    food_insecurity = ifelse(
      cut_size_meals == "Yes" | eat_less == "Yes" | hungry_did_not_eat == "Yes",
      "Yes",
      "No"
    ),
    food_insecurity = factor(food_insecurity, levels = c("No", "Yes"))
  ) |> 
  select(-c(eat_less, cut_size_meals, hungry_did_not_eat))
Code
table1_data |> 
  ggplot(aes(x = food_insecurity)) +
  geom_bar() +
  labs(x= "Food insecurity", y = "Count")+
  theme_classic()

We also need to include the participants’ partner’s gender.

Participants are asked about their partner’s gender at visits 1000, 1100, 1200, 1300, 1400, 1500, 1700, 1900, 2120 in CRF 19.

The variable past_week_sex_partner of the mae@metadata$visits_crfs_merged table will be summarize into one of these 4 categories:

  • No sex
  • Only male
  • Only female
  • Both

We add this variable to the table1_data table.

Code
gender_sexual_partner <- 
  metadata(mae)$visits_crfs_merged |>
  select(pid, past_week_sex_partner) |>
  filter(!is.na(past_week_sex_partner)) |>
  group_by(pid) |>
  summarise(
    gender_sexual_partner = 
      case_when(
        all(past_week_sex_partner == "No sex in the past week") ~ "No sex",
        any(past_week_sex_partner %in% c("A single male partner", "Multiple male partners")) &
          !any(past_week_sex_partner %in% c("A single female partner", "Multiple female partners")) ~ "Only man",
        any(past_week_sex_partner %in% c("A single female partner", "Multiple female partners")) &
          !any(past_week_sex_partner %in% c("A single male partner", "Multiple male partners")) ~ "Only female",
        any(past_week_sex_partner %in% c("A single male partner", "Multiple male partners")) &
          any(past_week_sex_partner %in% c("A single female partner", "Multiple female partners")) ~ "Both",
        TRUE ~ NA_character_
      ),
    .groups = "drop"
  )

table1_data <- 
  table1_data |> 
  left_join(gender_sexual_partner, by = "pid")

We also re-code the race variable as follows:

  • “Asian or South Asian” is re-coded to “Asian” (for brievity)
  • “Black, African American, or African” and “African” are re-coded to “Black” (for brievity)
  • “Coloured”, “White”, “Other”, and “Prefered not to answer” are left as is.
Code
table1_data <-
  table1_data |>
  
  mutate(
    across(where(is.character), as.factor),
    
    race = fct_recode(
      race,
      "Asian" = "Asian or South Asian",
      "Black" = "Black, African American, or African",
      "Black" = "African",
      "Coloured" = "Coloured",
      "White" = "White",
      "Other" = "Other",
      "Prefer not to answer" = "Prefer not to answer"
    ),
    
    race = fct_relevel(race, "Asian", "Black", "Coloured", "White", "Other", "Prefer not to answer"),
    arm = arm |> fct_drop()
  ) |>
  
  set_variable_labels(
    food_insecurity = "Report food insecurity in past 12 months ",
    sexual_partners_past_month = "Number of partners in past month",
    sexual_partners_lifetime = "Number of partners in lifetime",
    ethn = "Ethnicity", 
    contraception_method = "Contraception", 
    gender_sexual_partner = "Gender of sexual partners"
  )

Create Table 1 (by arm)

Code
table1_data |> 
  select(-pid) |>
  rename_with(~ str_to_title(.x)) |>  
  tbl_summary(
    by = Arm,
    type =
      list(
        Sexual_partners_past_month ~ "continuous",
        Sexual_partners_lifetime ~ "continuous"
      ),
    statistic = list(
      all_continuous() ~ "{mean} ({min}-{max})",
      all_categorical() ~ "{n} ({p}%)"
    )
  )  |> 
  add_p(include = -Site)
Characteristic Pl
N = 191
LC-106-7
N = 211
LC-106-3
N = 151
LC-106-o
N = 151
LC-115
N = 201
p-value2
Site





    CAP 14 (74%) 14 (67%) 14 (93%) 14 (93%) 14 (70%)
    MGH 5 (26%) 7 (33%) 1 (6.7%) 1 (6.7%) 6 (30%)
Age 29 (20-38) 29 (18-40) 26 (21-34) 26 (19-35) 29 (20-40) 0.5
Race




0.5
    Asian 1 (5.3%) 0 (0%) 1 (6.7%) 0 (0%) 0 (0%)
    Black 14 (74%) 18 (86%) 14 (93%) 15 (100%) 18 (90%)
    Coloured 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
    White 2 (11%) 3 (14%) 0 (0%) 0 (0%) 1 (5.0%)
    Other 1 (5.3%) 0 (0%) 0 (0%) 0 (0%) 1 (5.0%)
    Prefer not to answer 1 (5.3%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Ethnicity




0.8
    Not hispanic/latino/spanish 3 (60%) 6 (86%) 1 (100%) 1 (100%) 4 (67%)
    hispanic/latino/spanish 2 (40%) 1 (14%) 0 (0%) 0 (0%) 2 (33%)
    Unknown 14 14 14 14 14
Number of partners in lifetime 6 (1-10) 11 (1-90) 4 (1-10) 6 (1-20) 5 (1-10) 0.8
Number of partners in past month 1 (1-2) 1 (0-2) 1 (0-2) 1 (0-3) 1 (0-2) 0.4
Contraception




>0.9
    Continuous combined oral contraceptive pills (skip placebo) 3 (16%) 5 (24%) 1 (6.7%) 1 (6.7%) 3 (15%)
    Contraceptive vaginal ring 0 (0%) 1 (4.8%) 0 (0%) 0 (0%) 0 (0%)
    Cyclic combined oral contraceptive pills 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (5.0%)
    Depo provera 11 (58%) 10 (48%) 12 (80%) 11 (73%) 11 (55%)
    Hormonal implant 1 (5.3%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
    Hormone containing IUD 2 (11%) 2 (9.5%) 0 (0%) 0 (0%) 2 (10%)
    Net-EN 2 (11%) 2 (9.5%) 2 (13%) 3 (20%) 2 (10%)
    Other 0 (0%) 1 (4.8%) 0 (0%) 0 (0%) 1 (5.0%)
Report food insecurity in past 12 months 7 (37%) 7 (33%) 6 (40%) 5 (33%) 6 (30%) >0.9
Gender of sexual partners




0.8
    No sex 1 (5.3%) 3 (14%) 1 (6.7%) 1 (6.7%) 3 (15%)
    Only man 18 (95%) 18 (86%) 14 (93%) 14 (93%) 17 (85%)
1 n (%); Mean (Min-Max)
2 Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test

Create Table 1 (by site)

Code
table1_data |> 
  select(arm, age, race, site, food_insecurity, contraception_method, sexual_partners_past_month, sexual_partners_lifetime) |>
  rename_with(~ str_to_title(.x)) |>  
  tbl_summary(
    by = Site,
    type =
      list(
        Sexual_partners_past_month ~ "continuous",
        Sexual_partners_lifetime ~ "continuous"
      ),
    statistic = list(
      all_continuous() ~ "{mean} ({min}-{max})",
      all_categorical() ~ "{n} ({p}%)"
    )
  )  |> 
  add_p()
Characteristic CAP
N = 701
MGH
N = 201
p-value2
Arm

0.2
    Pl 14 (20%) 5 (25%)
    LC-106-7 14 (20%) 7 (35%)
    LC-106-3 14 (20%) 1 (5.0%)
    LC-106-o 14 (20%) 1 (5.0%)
    LC-115 14 (20%) 6 (30%)
Age 27 (18-37) 32 (22-40) <0.001
Race

<0.001
    Asian 0 (0%) 2 (10%)
    Black 70 (100%) 9 (45%)
    Coloured 0 (0%) 0 (0%)
    White 0 (0%) 6 (30%)
    Other 0 (0%) 2 (10%)
    Prefer not to answer 0 (0%) 1 (5.0%)
Report food insecurity in past 12 months 28 (40%) 3 (15%) 0.038
Contraception

<0.001
    Continuous combined oral contraceptive pills (skip placebo) 5 (7.1%) 8 (40%)
    Contraceptive vaginal ring 0 (0%) 1 (5.0%)
    Cyclic combined oral contraceptive pills 0 (0%) 1 (5.0%)
    Depo provera 54 (77%) 1 (5.0%)
    Hormonal implant 0 (0%) 1 (5.0%)
    Hormone containing IUD 0 (0%) 6 (30%)
    Net-EN 11 (16%) 0 (0%)
    Other 0 (0%) 2 (10%)
Number of partners in past month 1 (0-3) 1 (0-2) 0.4
Number of partners in lifetime 4 (1-20) 14 (4-90) <0.001
1 n (%); Mean (Min-Max)
2 Fisher’s exact test; Wilcoxon rank sum test; Pearson’s Chi-squared test

And finally we create one Table 1 for each site

Code
table1_data |> 
  filter(site == "CAP") |> 
  select(-c(pid, site)) |>
  rename_with(~ str_to_title(.x)) |>  
  tbl_summary(
    by = Arm,
    type =
      list(
        Sexual_partners_past_month ~ "continuous",
        Sexual_partners_lifetime ~ "continuous"
      ),
    statistic = list(
      all_continuous() ~ "{mean} ({min}-{max})",
      all_categorical() ~ "{n} ({p}%)"
    )
  ) |> 
  add_p()|>
  modify_caption("**Site = CAP**")
Site = CAP
Characteristic Pl
N = 141
LC-106-7
N = 141
LC-106-3
N = 141
LC-106-o
N = 141
LC-115
N = 141
p-value2
Age 28 (20-36) 26 (18-36) 26 (21-34) 26 (19-35) 27 (20-37) >0.9
Race




>0.9
    Asian 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
    Black 14 (100%) 14 (100%) 14 (100%) 14 (100%) 14 (100%)
    Coloured 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
    White 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
    Other 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
    Prefer not to answer 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Ethnicity





    Not hispanic/latino/spanish 0 (NA%) 0 (NA%) 0 (NA%) 0 (NA%) 0 (NA%)
    hispanic/latino/spanish 0 (NA%) 0 (NA%) 0 (NA%) 0 (NA%) 0 (NA%)
    Unknown 14 14 14 14 14
Number of partners in lifetime 5 (1-10) 4 (1-7) 4 (1-10) 6 (1-20) 4 (1-9) 0.8
Number of partners in past month 1 (1-2) 1 (1-2) 1 (0-2) 1 (0-3) 1 (1-2) 0.2
Contraception




0.9
    Continuous combined oral contraceptive pills (skip placebo) 1 (7.1%) 2 (14%) 0 (0%) 0 (0%) 2 (14%)
    Contraceptive vaginal ring 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
    Cyclic combined oral contraceptive pills 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
    Depo provera 11 (79%) 10 (71%) 12 (86%) 11 (79%) 10 (71%)
    Hormonal implant 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
    Hormone containing IUD 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
    Net-EN 2 (14%) 2 (14%) 2 (14%) 3 (21%) 2 (14%)
    Other 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Report food insecurity in past 12 months 7 (50%) 5 (36%) 6 (43%) 5 (36%) 5 (36%) >0.9
Gender of sexual partners




>0.9
    No sex 1 (7.1%) 2 (14%) 1 (7.1%) 1 (7.1%) 2 (14%)
    Only man 13 (93%) 12 (86%) 13 (93%) 13 (93%) 12 (86%)
1 Mean (Min-Max); n (%)
2 Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test
Code
table1_data |> 
  filter(site == "MGH") |> 
  select(-c(pid, site)) |>
  rename_with(~ str_to_title(.x)) |>  
  tbl_summary(
    by = Arm,
    type =
      list(
        Sexual_partners_past_month ~ "continuous",
        Sexual_partners_lifetime ~ "continuous"
      ),
    statistic = list(
      all_continuous() ~ "{mean} ({min}-{max})",
      all_categorical() ~ "{n} ({p}%)"
    )
  ) |> 
  add_p()|>
  modify_caption("**Site = MGH**")
Site = MGH
Characteristic Pl
N = 51
LC-106-7
N = 71
LC-106-3
N = 11
LC-106-o
N = 11
LC-115
N = 61
p-value2
Age 32 (26-38) 33 (24-40) 25 (25-25) 35 (35-35) 33 (22-40) 0.7
Race




0.13
    Asian 1 (20%) 0 (0%) 1 (100%) 0 (0%) 0 (0%)
    Black 0 (0%) 4 (57%) 0 (0%) 1 (100%) 4 (67%)
    Coloured 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
    White 2 (40%) 3 (43%) 0 (0%) 0 (0%) 1 (17%)
    Other 1 (20%) 0 (0%) 0 (0%) 0 (0%) 1 (17%)
    Prefer not to answer 1 (20%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Ethnicity




0.8
    Not hispanic/latino/spanish 3 (60%) 6 (86%) 1 (100%) 1 (100%) 4 (67%)
    hispanic/latino/spanish 2 (40%) 1 (14%) 0 (0%) 0 (0%) 2 (33%)
Number of partners in lifetime 9 (7-10) 25 (6-90) 7 (7-7) 10 (10-10) 7 (4-10) 0.059
Number of partners in past month 1 (1-2) 1 (0-2) 1 (1-1) 1 (1-1) 1 (0-2) >0.9
Contraception




>0.9
    Continuous combined oral contraceptive pills (skip placebo) 2 (40%) 3 (43%) 1 (100%) 1 (100%) 1 (17%)
    Contraceptive vaginal ring 0 (0%) 1 (14%) 0 (0%) 0 (0%) 0 (0%)
    Cyclic combined oral contraceptive pills 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (17%)
    Depo provera 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (17%)
    Hormonal implant 1 (20%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
    Hormone containing IUD 2 (40%) 2 (29%) 0 (0%) 0 (0%) 2 (33%)
    Net-EN 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
    Other 0 (0%) 1 (14%) 0 (0%) 0 (0%) 1 (17%)
Report food insecurity in past 12 months 0 (0%) 2 (29%) 0 (0%) 0 (0%) 1 (17%) 0.8
Gender of sexual partners




>0.9
    No sex 0 (0%) 1 (14%) 0 (0%) 0 (0%) 1 (17%)
    Only man 5 (100%) 6 (86%) 1 (100%) 1 (100%) 5 (83%)
1 Mean (Min-Max); n (%)
2 Kruskal-Wallis rank sum test; Fisher’s exact test

Primary outcomes (Table 2)

Colonization by week 5

Code
col_by_week5 <- 
  mae@colData |> as_tibble() |> 
  select(pid, site, randomized, arm, ITT, mITT, PP) |> 
  distinct() |> 
  filter(randomized, mITT) |> 
  left_join(
    mae[["col_LBP_mg"]] |> 
      as_tibble() |> 
      dplyr::left_join(
        mae@colData |> as_tibble() |> select(uid, pid, visit_code),
        by = join_by(.sample == uid)
      ) |> 
      dplyr::filter(visit_code == "1500", .feature == "colonized_LBP_by_mg")
  ) |> 
  mutate(LBP_colonization_by_week5 = outcome |> replace_na(FALSE)) 
  

# by qPCR 

col_by_week5_qPCR <-
  mae@colData |> as_tibble() |> 
  select(pid, site, randomized, arm, ITT, mITT, PP) |> 
  distinct() |> 
  filter(randomized, mITT) |> 
  left_join(
    mae[["col_LBP_qPCR"]] |> 
      as_tibble() |> 
      dplyr::left_join(
        mae@colData |> as_tibble() |> select(uid, pid, visit_code),
        by = join_by(.sample == uid)
      ) |> 
      dplyr::filter(visit_code == "1500", .feature == "colonized_LBP_by_qpcr")
  ) |> 
  mutate(LBP_colonization_by_week5 = outcome |> replace_na(FALSE)) 


detected_by_week5_qPCR <-
  mae@colData |> as_tibble() |> 
  select(pid, site, randomized, arm, ITT, mITT, PP) |> 
  distinct() |> 
  filter(randomized, mITT) |> 
  left_join(
    mae[["col_LBP_qPCR"]] |> 
      as_tibble() |> 
      dplyr::left_join(
        mae@colData |> as_tibble() |> select(uid, pid, visit_code),
        by = join_by(.sample == uid)
      ) |> 
      dplyr::filter(visit_code == "1500", .feature == "LBP_detected_by_qpcr")
  ) |> 
  mutate(LBP_detected_by_week5 = outcome |> replace_na(FALSE)) 


# crispatus dominance 

crisp_dom_by_5week <- 
  mae@colData |> as_tibble() |> 
  select(pid, site, randomized, arm, ITT, mITT, PP) |> 
  distinct() |> 
  filter(randomized, mITT) |> 
  left_join(
    mae[["col_crispatus_mg"]] |> 
      as_tibble() |> 
      dplyr::left_join(
        mae@colData |> as_tibble() |> select(uid, pid, visit_code),
        by = join_by(.sample == uid)
      ) |> 
      dplyr::filter(visit_code == "1500", .feature == "crispatus_dominance_by_mg")
  ) |> 
  mutate(crisp_dom_by_5week = outcome |> replace_na(FALSE)) 

Visualization of the primary outcome (and a few selected secondary outcomes)

Code
col_by_week5 |> 
  arrange(site, arm, LBP_colonization_by_week5) |>
  group_by(site, arm) |> 
  mutate(participant_number = row_number() |> factor()) |> 
  ungroup() |> 
  ggplot(aes(x = participant_number, y = site |> fct_rev(), col = site, fill = site, shape = LBP_colonization_by_week5)) +
  geom_point(size = 3) +
  facet_grid(. ~ arm, scales = "free_x", space = "free_x") +
  guides(color = "none", fill = "none") +
  scale_shape_manual("Colonized by LBP by week 5", values = c(1, 16)) +
  scale_color_manual(values = site_colors) + 
  xlab("Participant number") + ylab("") +
  theme(
    legend.position = "top",
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)
  ) 

Code
col_by_week5_qPCR |> 
  arrange(site, arm, LBP_colonization_by_week5) |>
  group_by(site, arm) |> 
  mutate(participant_number = row_number() |> factor()) |> 
  ungroup() |> 
  ggplot(aes(x = participant_number, y = site |> fct_rev(), col = site, fill = site, shape = LBP_colonization_by_week5)) +
  geom_point(size = 3) +
  facet_grid(. ~ arm, scales = "free_x", space = "free_x") +
  guides(color = "none", fill = "none") +
  scale_shape_manual("Colonized by LBP by week 5", values = c(1, 16)) +
  scale_color_manual(values = site_colors) + 
  xlab("Participant number") + ylab("") +
  ggtitle("Colonization based on the relative abundances estimated by qPCR") +
  theme(
    legend.position = "top",
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), 
    plot.title = element_text(hjust = 0.5)
  ) 

Code
detected_by_week5_qPCR |> 
  arrange(site, arm, LBP_detected_by_week5) |>
  group_by(site, arm) |> 
  mutate(participant_number = row_number() |> factor()) |> 
  ungroup() |> 
  ggplot(aes(x = participant_number, y = site |> fct_rev(), col = site, fill = site, shape = LBP_detected_by_week5)) +
  geom_point(size = 3) +
  facet_grid(. ~ arm, scales = "free_x", space = "free_x") +
  guides(color = "none", fill = "none") +
  scale_shape_manual("Colonized by LBP by week 5", values = c(1, 16)) +
  scale_color_manual(values = site_colors) + 
  xlab("Participant number") + ylab("") +
  ggtitle("At least 2 LBP strains detected at the same visit by week 5 qPCR") +
  theme(
    legend.position = "top",
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), 
    plot.title = element_text(hjust = 0.5)
  ) 

Code
crisp_dom_by_5week |> 
  arrange(site, arm, crisp_dom_by_5week) |>
  group_by(site, arm) |> 
  mutate(participant_number = row_number() |> factor()) |> 
  ungroup() |> 
  ggplot(aes(x = participant_number, y = site |> fct_rev(), col = site, fill = site, shape = crisp_dom_by_5week)) +
  geom_point(size = 3) +
  facet_grid(. ~ arm, scales = "free_x", space = "free_x") +
  guides(color = "none", fill = "none") +
  scale_shape_manual("Colonized by LBP by week 5", values = c(1, 16)) +
  scale_color_manual(values = site_colors) + 
  xlab("Participant number") + ylab("") +
  ggtitle("L. crispatus dominance (≥ 50% by metagenomics)") +
  theme(
    legend.position = "top",
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), 
    plot.title = element_text(hjust = 0.5)
  ) 

Code
summary_table <- 
  col_by_week5 |> 
  group_by(site, arm) |> 
  summarize(
    n = n(), 
    n_success = sum(LBP_colonization_by_week5),
    .groups = "drop"
  ) |> 
  mutate(
    p = n_success / n,
    CI = binom::binom.confint(n_success, n, method = "exact")
  )
Code
table_2 <- 
  summary_table |> 
  mutate(
    N_per_site_and_arm = str_c("N = ", n),
    LBP_strain_detected = str_c(n_success, " (", round(p * 100)," %)"),
    CI_text = str_c(round(CI$lower * 100), "% - ", round(CI$upper * 100), "%")
  ) |> 
  dplyr::select(
    site, arm, N_per_site_and_arm, LBP_strain_detected, CI_text
  ) |> 
  pivot_longer(-c(site, arm)) |>
  mutate(
    name = 
      case_when(
        name == "N_per_site_and_arm" ~ "N participants",
        name == "LBP_strain_detected" ~ "n (%) participants\nwith LBP strain detected",
        name == "CI_text" ~ "95% CI"
      )
  ) |> 
  pivot_wider(
    id_cols = c(site, name), 
    names_from = arm,
    values_from = value, values_fill = ""
    )
Code
table_2 |> 
  group_by(site) |> 
  gt(caption = "Table 1: Colonization by week 5 by arm and site", row_group_as_column = TRUE) |> 
  cols_width(
    "name" ~ px(200),
    everything() ~ px(120)
  ) |> 
  cols_label(
    name = "",
    # Blinded = "All blinded arms",
    Pl = "Placebo",
    `LC-106-7` = "LC-106<br>7 days",
    `LC-106-3` = "LC-106<br>3 days",
    `LC-106-o` = "LC-106<br>overlap",
    `LC-115` = "LC-115<br>7 days",
    .fn = md
  )
Table 1: Colonization by week 5 by arm and site
Placebo LC-106
7 days
LC-106
3 days
LC-106
overlap
LC-115
7 days
CAP N participants N = 14 N = 14 N = 14 N = 14 N = 14
n (%) participants with LBP strain detected 0 (0 %) 9 (64 %) 7 (50 %) 7 (50 %) 10 (71 %)
95% CI 0% - 23% 35% - 87% 23% - 77% 23% - 77% 42% - 92%
MGH N participants N = 5 N = 7 N = 1 N = 1 N = 6
n (%) participants with LBP strain detected 1 (20 %) 6 (86 %) 1 (100 %) 1 (100 %) 6 (100 %)
95% CI 1% - 72% 42% - 100% 3% - 100% 3% - 100% 54% - 100%

Tests

We test for differences between the placebo arm and each active arm using Fisher’s exact test (exhaustive permutation test).

Data from both site are merged for each arms given the low numbers of participants at MGH.

The \(p\)-values are adjusted for multiple testing using the Benjamini-Hochberg method.

Code
arms_to_test <- col_by_week5$arm |> unique() |> setdiff("Pl")

fisher_test_results <-
  map(
  arms_to_test,
  ~ {
    arm_to_test <- .x
    tmp <- col_by_week5 |> filter(arm %in% c("Pl", arm_to_test))
    table_for_test <- table(tmp$LBP_colonization_by_week5, tmp$arm |> fct_drop())
    test_res <- fisher.test(table_for_test, alternative = "greater")
    tibble(
      arm = arm_to_test,
      p_value = test_res$p.value,
      OR = test_res$estimate,
      lower_CI = test_res$conf.int[1],
      upper_CI = test_res$conf.int[2]
    )
  }
) |> 
  bind_rows() |> 
  mutate(
    adj_p_value = p_value |> p.adjust(method = "BH")
  )
Code
test_results <- 
  fisher_test_results |> 
  mutate(
    adj_p_value = adj_p_value |> scales::pvalue(accuracy = 0.001),
    OR = OR |> round(2) |> as.character(),
    CI = str_c(lower_CI |> round(2), " - ", upper_CI |> round(2))
    ) |> 
  select(arm, adj_p_value, OR, CI) |>
  pivot_longer(cols = -arm, names_to = "name", values_to = "value") |>
  pivot_wider(names_from = arm, values_from = value) |> 
  mutate(
    name = 
      case_when(
        name == "adj_p_value" ~ "Adj. p-value",
        name == "OR" ~ "OR",
        name == "CI" ~ "95% CI",
        TRUE ~ name
      )
  )
Code
table_2_with_model_results <- 
  bind_rows(
    table_2,
    test_results
  ) |> 
  mutate(
    site = site |> str_replace_na(""),
    Pl = 
      case_when(
        name == "Adj. p-value" ~ "Ref.",
        name == "OR" ~ "",
        str_detect(name, "CI") ~ "",
        TRUE ~ Pl
      )
  )
Code
table_2_with_model_results |> 
  group_by(site) |>
  gt(caption = "Table 2: Colonization by week 5 by arm and site", row_group_as_column = TRUE) |> 
  cols_width(
    "name" ~ px(150),
    everything() ~ px(120)
  ) |> 
  cols_label(
    name = "",
    Pl = "Placebo",
    `LC-106-7` = "LC-106<br>7 days",
    `LC-106-3` = "LC-106<br>3 days",
    `LC-106-o` = "LC-106<br>overlap",
    `LC-115` = "LC-115<br>7 days",
    .fn = md
  )
Table 2: Colonization by week 5 by arm and site
Placebo LC-106
7 days
LC-106
3 days
LC-106
overlap
LC-115
7 days
CAP N participants N = 14 N = 14 N = 14 N = 14 N = 14
n (%) participants with LBP strain detected 0 (0 %) 9 (64 %) 7 (50 %) 7 (50 %) 10 (71 %)
95% CI 35% - 87% 23% - 77% 23% - 77% 42% - 92%
MGH N participants N = 5 N = 7 N = 1 N = 1 N = 6
n (%) participants with LBP strain detected 1 (20 %) 6 (86 %) 1 (100 %) 1 (100 %) 6 (100 %)
95% CI 42% - 100% 3% - 100% 3% - 100% 54% - 100%
Adj. p-value Ref. <0.001 0.002 0.002 <0.001
OR 39.76 18.61 18.61 60.46
95% CI 5.69 - Inf 2.48 - Inf 2.48 - Inf 8.17 - Inf

Model

Code
col_by_week5_agg <- 
  col_by_week5 |>
  group_by(site, arm) |>
  summarise(success = LBP_colonization_by_week5 |> sum(), ntrials = n()) 

bfit <- 
  brm(
    success | trials(ntrials) ~ arm + site + site:arm, 
    data = col_by_week5_agg, 
    family = binomial("logit")
    )
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.3)’
using SDK: ‘MacOSX15.5.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Users/laurasymul/Library/R/arm64/4.4/library/Rcpp/include/"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/RcppEigen/include/"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/RcppEigen/include/unsupported"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/BH/include" -I"/Users/laurasymul/Library/R/arm64/4.4/library/StanHeaders/include/src/"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/StanHeaders/include/"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/RcppParallel/include/"  -I"/Users/laurasymul/Library/R/arm64/4.4/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Users/laurasymul/Library/R/arm64/4.4/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Users/laurasymul/Library/R/arm64/4.4/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Users/laurasymul/Library/R/arm64/4.4/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Users/laurasymul/Library/R/arm64/4.4/library/RcppEigen/include/Eigen/Core:19:
/Users/laurasymul/Library/R/arm64/4.4/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
  679 | #include <cmath>
      |          ^~~~~~~
1 error generated.
make: *** [foo.o] Error 1

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 2.3e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 0.845 seconds (Warm-up)
Chain 1:                1.002 seconds (Sampling)
Chain 1:                1.847 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 2e-06 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 0.941 seconds (Warm-up)
Chain 2:                1.1 seconds (Sampling)
Chain 2:                2.041 seconds (Total)
Chain 2: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 2e-06 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 0.774 seconds (Warm-up)
Chain 3:                0.908 seconds (Sampling)
Chain 3:                1.682 seconds (Total)
Chain 3: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 2e-06 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 0.843 seconds (Warm-up)
Chain 4:                1.136 seconds (Sampling)
Chain 4:                1.979 seconds (Total)
Chain 4: 
Code
summary(bfit)
plot(bfit, ask = FALSE)
bind_cols(col_by_week5_agg, predict(bfit))
# loo(bfit, moment_match = TRUE, reloo = TRUE)
# pp_check(bfit)
Code
plot_res <- plot(conditional_effects(bfit), ask = FALSE)
Code
plot_res <- lapply(plot_res, function(p) {
  p + scale_color_manual(values = site_colors)
})

Reduce(`+`, plot_res) + 
  plot_layout(widths = c(2.2, 1, 3))

Code
model_results <- summary(bfit)$fixed
model_results <- 
  model_results |> 
  rownames_to_column("term") |> 
  as_tibble() |> 
  mutate(
    factor = 
      case_when(
        str_detect(term, ":site") ~ "Arm x Site",
        str_detect(term, "^site") ~ "Site",
        str_detect(term, "^arm") ~ "Arm",
        TRUE ~ "Ref.",
      ),
    factor_level = 
      term |> 
      str_remove("arm") |> str_remove("site") |> str_replace(":"," x ") |> 
      str_replace("6M", "6-") |> str_replace("CM", "C-")
  ) |> 
  mutate(
    OR = ifelse(factor == "Ref.", NA, exp(Estimate)),
    lower_CI = ifelse(factor == "Ref.", NA, exp(`l-95% CI`)),
    upper_CI = ifelse(factor == "Ref.", NA, exp(`u-95% CI`))
  ) 
Code
model_results |> 
  select(factor, factor_level, OR, lower_CI, upper_CI) |>
  gt() |> 
  gt::fmt_number(
    decimals = 2, n_sigfig = 3
  )
Code
model_results_arm <- 
  model_results |> 
  filter(factor == "Arm") |> 
  mutate(
    OR = OR |> round(2) |> as.character(),
    CI = str_c(lower_CI |> round(2), " - ", upper_CI |> round(2))
    ) |> 
  select(factor_level, OR, CI) |>
  pivot_longer(cols = -factor_level, names_to = "name", values_to = "value") |>
  pivot_wider(names_from = factor_level, values_from = value) 
Code
table_2_with_model_results <- 
  bind_rows(
    table_2,
    model_results_arm
  ) |> 
  mutate(
    site = site |> str_replace_na(""),
    Pl = 
      case_when(
        name == "OR" ~ "Ref.",
        name == "CI" ~ "",
        TRUE ~ Pl
      )
  )
Code
table_2_with_model_results |> 
  group_by(site) |>
  gt(caption = "Table 2: Colonization by week 5 by arm and site", row_group_as_column = TRUE) |> 
  cols_width(
    "name" ~ px(200),
    everything() ~ px(120)
  ) |> 
  cols_label(
    name = "",
    Pl = "Placebo",
    `LC-106-7` = "LC-106<br>7 days",
    `LC-106-3` = "LC-106<br>3 days",
    `LC-106-o` = "LC-106<br>overlap",
    `LC-115` = "LC-115<br>7 days",
    .fn = md
  )

Secondary outcomes (Figures 2-4)

Kinetics of colonization

Total relative abundances of LBP isolates

Code
total_rel_ab_of_LBP <- 
  mae[["mg"]] |> 
  as_tibble() |> 
  filter(!is.na(LBP)) |> 
  group_by(.sample, uid) |>
  summarize(
    total_LBP_rel_ab = sum(rel_ab, na.rm = TRUE),
    .groups = "drop"
  )

total_rel_ab_of_LBP <- 
  total_rel_ab_of_LBP |> 
  dplyr::left_join(mae |> colData() |> as_tibble(), by = join_by(uid))
Code
g_tot_prop_LBP_strains <- 
  total_rel_ab_of_LBP |>
  filter(visit_type == "Clinic", !is.na(arm)) |> 
  ggplot(aes(x = study_week, y = total_LBP_rel_ab, col = arm)) +
  geom_line(aes(group = pid), alpha = 0.5) +
  geom_point(size = 0.5, alpha = 0.5) +
  facet_grid(site ~ arm) + 
  scale_x_continuous("Study weeks", breaks = c(0:5,7,9,12), minor_breaks = 0:12) +
  scale_y_continuous(
    "Total relative abundances\nof LBP strains", labels = scales::percent,
    breaks = seq(0, 1, by = 0.2)
  ) +
  expand_limits(y = 0) +
  scale_color_manual(values = get_arm_colors()) +
  guides(col = "none") +
  ggtitle(make_title(mae)) 
Code
g_tot_prop_LBP_strains

Proportions of participants who colonized

Code
g_prop_colonized_by <- 
  total_rel_ab_of_LBP |> 
  filter(!is.na(arm)) |> 
  group_by(site, arm, study_week) |>
  summarize(
    prop_who_colonized_at = mean(total_LBP_rel_ab > 0.05), 
    CI_low = 
      prop.test(
        x = sum(total_LBP_rel_ab > 0.05), n = n(), 
        conf.level = 0.95)$conf.int[1],
    CI_high = 
      prop.test(
        x = sum(total_LBP_rel_ab > 0.05), n = n(), 
        conf.level = 0.95)$conf.int[2],
    .groups = "drop"
  ) |>
  ggplot(aes(x = study_week, y = prop_who_colonized_at, col = arm)) +
  geom_ribbon(
    aes(ymin = CI_low, ymax = CI_high, fill = arm), 
    alpha = 0.1, color = NA
  ) +
  geom_line() +
  geom_point() +
  facet_grid(site ~ .) + 
  scale_x_continuous("Study weeks", breaks = c(0:5,7,9,12), minor_breaks = 0:12) +
  scale_y_continuous(
    "Proportion of participants\nwith detected LBP strains", labels = scales::percent,
    breaks = seq(0, 1, by = 0.2)
  ) +
  expand_limits(y = 0) +
  scale_color_manual("Arm", values = get_arm_colors()) +
  scale_fill_manual("Arm", values = get_arm_colors()) +
  ggtitle(make_title(mae)) 

g_prop_colonized_by

Figure 2

Code
 (g_prop_colonized_by + ggtitle("")) +
  (g_tot_prop_LBP_strains + ggtitle("")) +
  plot_layout(guides = "collect", widths = c(1, 2)) +
  plot_annotation(tag_level = "a") &
  theme(legend.position = "bottom")

Microbiota trajectories

Code
rel_abs <- 
  mae[["mg"]] |> 
  as_tibble() |> 
  select(
    .feature, .sample, uid, rel_ab, poor_quality_mg_data,
    taxon_label, genus, LBP, strain_origin
    ) 
Code
rel_abs <- 
  rel_abs |> 
  group_by(.feature) |> 
  mutate(max_rel_ab = max(rel_ab, na.rm = TRUE), mean_rel_ab = mean(rel_ab, na.rm = TRUE)) |> 
  ungroup() |> 
  arrange(is.na(LBP), -mean_rel_ab) |> 
  mutate(.feature = .feature |> fct_inorder()) |> 
  mutate(
    taxon = 
      case_when(
        as.numeric(.feature) <= 29 ~ taxon_label,
        TRUE ~ "Other"
      )
  ) |> 
  arrange(is.na(LBP), LBP, genus != "Lactobacillus", -mean_rel_ab) |> 
  mutate(taxon = taxon |> fct_inorder() |> fct_relevel("Other", after = Inf))
Code
rel_abs_agg <- 
  rel_abs |> 
  group_by(uid, taxon, LBP, strain_origin, poor_quality_mg_data) |> 
  summarize(rel_ab = sum(rel_ab), .groups = "drop") |> 
  dplyr::left_join(mae |> colData() |> as_tibble(), by = join_by(uid)) 
Code
g_trajectories <-
  rel_abs_agg |>
  mutate(study_week = case_when(visit_code == "0000" ~ -1, TRUE ~ study_week)) |>
  filter(mITT, randomized != 0, visit_type == "Clinic", !is.na(study_week), !poor_quality_mg_data) |>
  group_by(pid) |> 
  mutate(
    tot_LC115 = sum(rel_ab[LBP == "LC-115"]),
    tot_LBP = sum(rel_ab[!is.na(LBP)])
    ) |> 
  ungroup() |> 
  arrange(desc(PP), -tot_LC115, -tot_LBP) |> 
  mutate(pid = pid |> fct_inorder()) |> 
  ggplot(aes(y = rel_ab, x = pid, fill = taxon, alpha = ifelse(PP, "PP", "mITT"))) +
  geom_col() +
  facet_grid(study_week ~ arm + site, scales = "free", space = "free") +
  # theme(panel.spacing.x = unit(0, "points")) +
  scale_fill_manual(
    values = get_taxa_colors(rel_abs_agg$taxon |> levels()),
    guide = guide_legend(ncol = 1)
    ) +
  scale_alpha_discrete("Population", range = c(0.5, 1)) +
  scale_y_continuous(
    "relative abundance", labels = scales::percent,
    breaks = seq(0, 0.5, by = 0.5), minor_breaks = seq(0, 1, by = 0.25)
  ) +
  scale_x_discrete("Participants") +
  theme(
    strip.text.y = element_text(angle = 0, hjust = 0),
    strip.text.x = element_text(angle = 90, hjust = 0),
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)
  )
Code
g_trajectories

Individual LBP strains by sites

Code
LBP_strains <- 
  mae[["mg"]] |> 
  as_tibble() |> 
  filter(!is.na(LBP)) |> 
  select(.feature, .sample, uid, rel_ab, LBP, strain_origin) |> 
  left_join(mae |> colData() |> as_tibble())
Code
LBP_strains |> 
  filter(!is.na(arm), !is.na(study_week), study_week >= 2) |> 
  ggplot() +
  aes(x = .feature, y = rel_ab, col = site) +
  geom_point(alpha = 0.5, size = 0.5) +
  # geom_boxplot(outlier.size = 0.5) +
  facet_grid(study_week ~ LBP + strain_origin , scales = "free_x", space = "free_x") +
  xlab("LBP strain") +
  scale_y_continuous(
    "Relative abundance bacteria", labels = scales::percent,
    breaks = seq(0, 1, by = 0.2)
  ) +
  scale_color_manual(values = site_colors) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)
  ) 

Code
LBP_strains |> 
  filter(!is.na(arm), !is.na(study_week), study_week >= 2) |> 
  ggplot() +
  aes(x = .feature, y = rel_ab, col = site) +
  # geom_point(alpha = 0.5, size = 0.5) +
  geom_boxplot(outlier.size = 0.5) +
  facet_grid(study_week ~ LBP + strain_origin , scales = "free_x", space = "free_x") +
  xlab("LBP strain") +
  scale_y_continuous(
    "Relative abundance bacteria", labels = scales::percent,
    breaks = seq(0, 1, by = 0.2)
  ) +
  scale_color_manual(values = site_colors) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)
  ) 

Code
LBP_strains |> 
  filter(!is.na(arm), !is.na(study_week), study_week >= 2) |> 
  ggplot() +
  aes(x = study_week, y = rel_ab, col = strain_origin) +
  geom_point(alpha = 0.5, size = 0.5) +
  geom_line(aes(group = pid), alpha = 0.5) +
  facet_grid(site ~ LBP |> str_replace("&", "&\n") + strain_origin + .feature , scales = "free_x", space = "free_x") +
  xlab("Study week") +
  scale_y_continuous(
    "Relative abundance bacteria", labels = scales::percent,
    breaks = seq(0, 1, by = 0.2)
  ) +
  #scale_color_manual(values = colorspace::darken(scale_colour_hue()$palette(2), amount = 0.4)) +
  scale_color_manual("Strain origin", values = strain_origin_colors) +
  scale_x_continuous("Study weeks", breaks = c(0:5,7,9,12), minor_breaks = 0:12) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)
  )